package org.ws.httphelper.example.simple;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.ws.httphelper.HttpHelper;
import org.ws.httphelper.builder.HttpHelperBuilder;
import org.ws.httphelper.model.field.ExpressionType;
import org.ws.httphelper.model.field.ParseField;
import org.ws.httphelper.model.field.ParseFieldType;
import org.ws.httphelper.model.http.ResponseFuture;
import org.ws.httphelper.model.template.RequestTemplate;
import org.ws.httphelper.template.DefaultRequestTemplate;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 下载一下阿里云RDS-数据库内核组的所有数据库内核月报的标题目录，以便于查看.
 * 强烈推荐：MySQL高质量文章：http://mysql.taobao.org/monthly
 * 感谢阿里云RDS-数据库内核组的分享
 */
public class MySqlTaobaoOrg {

    public static void main(String [] args){

        MySqlTaobaoOrg mySqlTaobaoOrg = new MySqlTaobaoOrg();
        // 执行请求
        List<String> result = mySqlTaobaoOrg.doRequest();
        // 打印结果查看
        System.out.println(StringUtils.join(result,"\n"));
    }

    private static final String ROOT_URL = "http://mysql.taobao.org/monthly";

    /**
     * 定义模板
     * @return
     */
    public RequestTemplate template(){
        // 使用默认Template
        RequestTemplate template = DefaultRequestTemplate.okHttpGetHtml(ROOT_URL);
        // 从HTML中css类为.posts标签下获取内容
        ParseField content = new ParseField("content",ParseFieldType.OBJECT,".posts",ExpressionType.CSS);
        // 获取所有a标签的标题
        ParseField parseField = new ParseField("linkList", ParseFieldType.LIST,"a", ExpressionType.CSS);
        // 添加解析子field
        content.addChildField(parseField);
        // 将ParseField加入到Template
        template.getRequestConfig().addParseHtmlField(content);
        return template;
    }

    /**
     * 执行请求
     * @return
     */
    public List<String> doRequest(){
        // 生成HttpHelper
        HttpHelper httpHelper = HttpHelperBuilder.builder().builderByTemplate(template());
        // 获取路径
        List<String> paths = buildPathList();
        List<String> all = new ArrayList<>();
        for (String path : paths) {
            // 执行请求
            ResponseFuture responseFuture = httpHelper.request(path);
            if(responseFuture.isSuccess()){
                // 未指定Output类型默认为Map
                Map output = (Map)responseFuture.getOutput();
                if(output != null && output.containsKey("content")){
                    // 取content
                    Map contentMap = (Map) output.get("content");
                    // 取content下的linkList
                    List<String> list = (List<String>)contentMap.get("linkList");
                    if(CollectionUtils.isNotEmpty(list)) {
                        // 将path添加到list做个分割
                        all.add(ROOT_URL+path);
                        // 添加所有link
                        all.addAll(list);
                    }
                }
            }
        }
        return all;
    }

    /**
     * 生成请求子路径（没有使用解析HTML目录方式）
     * @return
     */
    private List<String> buildPathList(){
        int startYear = 2014;
        int endYear = 2020;
        List<String> paths = new ArrayList<>(100);
        for(int i=startYear;i<=endYear;i++) {
            // 12个月
            for (int j = 1; j <= 12; j++) {
                // 根据范围判断一下
                if ((i == 2014 && j < 8)
                    || (i == 2020 && j > 9)
                ) {
                    continue;
                }
                StringBuilder path = new StringBuilder();
                path.append("/").append(i).append("/");
                if(j<10){
                    path.append("0").append(j);
                }
                else {
                    path.append(j);
                }
                path.append("/");
                paths.add(path.toString());
            }
        }
        return paths;
    }

}
